*===============================================================================
*
*					WORKER BELIEFS ABOUT OUTSIDE OPTIONS
*		(c)	Simon Jaeger, Christopher Roth, Nina Roussille, Benjamin Schoefer
*							  2023 December 5
*						   	   SOEP-IAB Data 
*
*===============================================================================


********************************************************************************
*								SOEP DATA	 								   *
********************************************************************************

cap log close            
log using ${log}/6_soep.log, replace  
set seed 6000


***************************************
**  I/ Load and clean the  data  **
***************************************

forvalues i=19(1)20 {
	
	if `i'==19 {
		use "$orig/Inno`i'F_IVER.dta", clear
		}
		
	if `i'==20 {
		use "$orig/Inno`i'_IVER.dta", clear
		}

	
	* recode and rename the variables
	recode lsex (1 = 0) (2 = 1)
	rename lsex female
	ren lgeb yob
	ren lgebm mob
	recode iverd00 (2 = 0)
	ren iverd00 samejob
	ren iverd01_1 salary_less_proba
	ren iverd01_2 salary_same_proba
	ren iverd01_3 salary_more_proba
	ren iverd02_1 environment_less_proba
	ren iverd02_2 environment_same_proba
	ren iverd02_3 environment_more_proba
	ren iverd03a_1 salary_switchin
	ren iverd03a_1a salary_switchin_less_pct
	ren iverd03a_1b salary_switchin_more_pct
	ren iverd03a_2 salary_switchout
	ren iverd03a_2a salary_switchout_less_pct
	ren iverd03a_2b salary_switchout_more_pct
	ren iverd03b salary_guess
	ren iverd04 confidence
	ren iverd06 switchout_proba
	ren iverd07 negotiate_proba
	ren iverd08 negotiate_pct
	ren iverd09 retention_pct
	recode iverd10 (2=0)
	ren iverd10 of_nego
	ren iverd12 paycut_quit_pct
	ren iverd13 of_quit_pct
	ren iverd14a newpay_quit
	ren iverd14a_1 newpay_quit_more_amount
	ren iverd14a_2 newpay_quit_less_amount
	ren iverd14b_1 newpay_quit_more_proba
	ren iverd14b_2 newpay_quit_same_proba
	ren iverd14b_3 newpay_quit_less_proba
	
	if `i'==19 {
	ren perw employment
	recode iverd15 (2 = 0)
	ren iverd15 treat
	ren iverd11_1 colleagues
	ren iverd11_2 endowment
	ren iverd11_3 fearnew
	ren iverd11_4 atmosphere
	ren iverd11_5 schedule
	ren iverd11_6 jobsecurity
	ren iverd11_7 obligation
	ren iverd11_8 difficulty
	ren iverd11_9 location
	ren iverd11_10 othereason
	ren iverd11_ka reasonnotspecified
	* for one observation, it looks like an annual rather than a monthly salary
	replace salary_guess = salary_guess / 12 if salary_guess ==57000
	}
	
	if `i'==20 {
	ren perw employment
	ren iverd11b_1 colleagues
	ren iverd11b_2 endowment
	ren iverd11b_3 fearnew
	ren iverd11b_4 atmosphere
	ren iverd11b_5 schedule
	ren iverd11b_6 jobsecurity
	ren iverd11b_7 obligation
	ren iverd11b_8 difficulty
	ren iverd11b_9 location
	ren iverd11b_10 othereason
	ren iverd11bka reasonnotspecified
	}
	
	
	* label variables
	label var hid "Current Wave HH Number"
	label var cid "Case-ID, Original Household Number"
	label var pid "Never Changing Person ID"
	label var stichu "Innovation survey wave"
	label var syear "survey year"
	label var female "Female"
	label var yob "Year of Birth"
	label var mob "Month of Birth"
	label var employment "Employment status"
	label var samejob "Same job as past year dummy"
	label var salary_less_proba "Pct of similar workers with lower salary"
	label var salary_same_proba "Pct of similar workers with same salary"
	label var salary_more_proba "Pct of similar workers with higher salary"
	label var environment_less_proba "Pct of similar workers with worse work environment"
	label var environment_same_proba "Pct of similar workers with same work environment"
	label var environment_more_proba "Pct of similar workers with better work environment"
	label var salary_switchin "Salary change when switching in"
	label var salary_switchin_less_pct "Pct decrease in salary when switching in"
	label var salary_switchin_more_pct "Pct increase in salary when switching in"
	label var salary_switchout "Salary change when switching out"
	label var salary_switchout_less_pct "Pct decrease in salary when switching out"
	label var salary_switchout_more_pct "Pct increase in salary when switching out"
	label var salary_guess "Guess on the monthly earnings of employees in similar job"
	label var confidence "Confidence about the monthly earnings guess"
	cap label var treat "Treatment group" 
	label var switchout_proba "Proba to find a job at another company in the next 12 months"
	label var negotiate_proba "Proba to ask for a raise in the next 12 months"
	label var negotiate_pct "Raise expectations for the next calendar year"
	label var retention_pct  "Current employer's pay raise to retain you"
	label var of_nego "Ability to negotiate current earnings if outside offer is 30% higher"
	label var colleagues "Why stay: I don't want to lose my colleagues"
	label var endowment "Why stay: I don't like change"
	label var fearnew "Why stay: I don't want to adapt to a new job"
	label var atmosphere "Why stay: I like my current work environment"
	label var schedule "Why stay: I like my current hours"
	label var jobsecurity "Why stay: I have a secure job"
	label var obligation "Why stay: I feel obliged to stay"
	label var difficulty "Why stay: unlikely to find new job with better pay"
	label var location "Why stay: would have to move"
	label var othereason "Why stay: other"
	label var reasonnotspecified "Why stay: not specified "
	label var paycut_quit_pct "Min pct cut at current firm to quit"
	label var of_quit_pct "Min pct raise at another firm to quit"
	label var newpay_quit "Change in salary if quit and have to find a new job"
	label var newpay_quit_more_amount "Salary increase if quit and have to find a new job"
	label var newpay_quit_less_amount "Salary decrease if quit and have to find a new job"
	label var newpay_quit_more_proba "Proba earn more at another employer"
	label var newpay_quit_same_proba "Proba earn same at another employer"
	label var newpay_quit_less_proba "Proba earn less at another employer"
	
	* label the values of variables
	label define femalelab 0 "Male" 1 "Female"
	label values female femalelab
	label define employmentlab 1 "Fully employed" 2 "In part-time employment" 3 "In training or apprenticeship" ///
	4 "Irregularly employed" 5 "In partial retirement" 7 "In the federal voluntary service" 8 "Disabled" ///
	9 "Not employed"
	label values employment employmentlab
	cap label define treatment 0 "Control" 1 "Treatment"
	cap label values treat treatment
	label define level 1 "decrease" 2 "same" 3 "increase"
	label values salary_switchin level 
	label values salary_switchout level
	label define pctpositive 1 "0-2%" 2 "2-5%" 3 "5-10%" 4 "10-15%" 5 "15-20%" 6 "20-30%" 7 "30-50%" 8 "50-75%" 9 ">75%"
	 foreach var of varlist  salary_switchin_more_pct  salary_switchout_more_pct {
					 label values `var' pctpositive
			  } 
	label define pctnegative 1 "-0-2%" 2 "-2-5%" 3 "-5-10%" 4 "-10-15%" 5 "-15-20%" 6 "-20-30%" 7 "-30-50%" 8 "-50-75%" 9 "> -75%"
	 foreach var of varlist salary_switchin_less_pct  salary_switchout_less_pct  {
					 label values `var' pctnegative
			  } 
	label define pctalt 1 "0" 2 "0-2%" 3 "2-5%" 4 "5-10%" 5 "10-15%" 6 ">15%"
	label values negotiate_pct pctalt
	label define levelalt 1 "increase" 2 "same" 3 "decrease"
	label values newpay_quit levelalt
	label define amountpositive 1 "0-50 euros" 2 "50-100 euros" 3 "100-200 euros" 4 "200-300 euros" 5 "300-400 euros" 6 "400-500 euros" 7 "500-750 euros" 8 "750-1000 euros" 9 "1000-1500 euros" 10 "1500-2000 euros" 11 "2000-3000 euros" 12 ">3000 euros" 
	label define amountnegative 1 "-0-50 euros" 2 "-50-100 euros" 3 "-100-200 euros" 4 "-200-300 euros" 5 "-300-400 euros" 6 "-400-500 euros" 7 "-500-750 euros" 8 "-750-1000 euros" 9 "-1000-1500 euros" 10 "-1500-2000 euros" 11 "-2000-3000 euros" 12 ">-3000 euros"
	label values newpay_quit_more_amount amountpositive
	label values newpay_quit_less_amount amountnegative
	label define yesno 0 "no" 1 "yes" 
	label values of_nego yesno
	label values samejob yesno
	label define confident 1 "Not confident" 2 "Somewhat not confident" 3 "Somewhat confident" 4 "Confident" 5 "Very confident"
	label values confidence confident
	
	* drop variables that only have missing values
	drop iverd01_ka iverd02_ka iverd14b_ka
	
	tempfile temp`i'
	save `temp`i''
	
	* merge salary information in the 2020 file
	if `i'==20 {
		use "$orig/Inno19F_IVER.dta", clear
		keep KLDB2010 pid
		merge 1:1 pid using `temp20'
		drop if _merge == 1
		drop _merge
		tempfile temp20
		save `temp20'
	
	}
	
}
	*******************************************************************
	**  III/ Merge in personal information from the IS current wave **
	*******************************************************************
	
	* rename and relabel variables 
forvalues i=19(1)20 {
	
	if `i'==19 {
	use "$orig/Inno`i'F_PE.dta", clear
	}
	if `i'==20 {
	use "$orig/Inno`i'_PB.dta", clear
	}
	ren pbrut salary
	label var salary "gross salary last month"
	cap noisily {
	ren pfamst marital
	if `i'==20 {
	recode marital (5=2) (6=5) 
	}
	if `i'==19 {
	recode marital (6=2) (8=1) 
	}
	label define maritals 1 "Married" 2 "Same sex marriage" 3 "Single, never married" 4 "Divorced" 5 "Widowed" 
	label values marital maritals
	label var marital "Marital status"
	keep pid salary marital pseitj
	}
	merge 1:1 pid using `temp`i''
	drop if _merge == 1
	drop _merge
	tempfile temp2`i'
	save `temp2`i''
}
	
	
*******************************************
**  II/ Merge in salary information  **
*******************************************

use "$orig/../kldb_2010_5_median_salaries_for_IAB.dta", clear

	
drop N_observations
ren kldb2010_5  KLDB2010 
ren median_gross_monthly_wage_2018 salary_median 
duplicates drop KLDB2010, force

	
tempfile temp3
save `temp3'

forvalue i=19(1)20{
	use `temp3'

	merge 1:m KLDB2010 using `temp2`i''
	keep if _merge != 1
	drop _merge
	* missing values are coded either as -1 or -2 or -5 , I replace with "."
	foreach var of varlist _all {
	cap replace `var' = . if `var' == -1 | `var' == -2 | `var' == -5
	}

	tempfile temp3`i'
	save `temp3`i''
}
	


	
	  
forvalue i=19(1)20{
	use  `temp2`i'', clear
	use  `temp3`i'', clear
	
	*******************************************
	**  IV/ New variable generation  **
	*******************************************
	
	* both 2019 and 2020
	
	gen d_guess_median = salary_guess - salary_median 
	gen d_guess_median_pct = ((salary_guess - salary_median) / salary_median) * 100
	gen l_d_guess_median = log(salary_guess) - log(salary_median)
	
	gen d_guess_own = salary_guess - salary
	gen d_own_guess = - d_guess_own
	gen d_guess_own_fulltime = d_guess_own if employment == 1 
	gen d_guess_own_pct = ((salary_guess - salary) / salary) * 100
	gen d_guess_own_fulltime_pct = d_guess_own_pct if employment == 1 
	gen l_d_guess_own = log(salary_guess) - log(salary)
	gen l_d_own_guess = -l_d_guess_own
	
	gen d_own_median = salary - salary_median
	gen d_own_median_fulltime = d_own_median if employment == 1  
	gen d_own_median_pct = ((salary - salary_median) / salary_median) * 100
	gen d_own_median_fulltime_pct = d_own_median_pct if employment == 1  
	gen l_d_own_median = log(salary) - log(salary_median)
	
	gen salary_switchin_change_pct =  salary_switchin_more_pct if !mi(salary_switchin_more_pct)
	replace salary_switchin_change_pct = - salary_switchin_less_pct if !mi(salary_switchin_less_pct)
	replace salary_switchin_change_pct = 0 if salary_switchin == 2
	
	gen salary_switchout_change_pct =  salary_switchout_more_pct  if !mi(salary_switchout_more_pct)
	replace salary_switchout_change_pct = - salary_switchout_less_pct if !mi(salary_switchout_less_pct)
	replace salary_switchout_change_pct = 0 if salary_switchout == 2
	
	gen newpay_quit_change = newpay_quit_more_amount if !mi(newpay_quit_more_amount)
	replace newpay_quit_change = - newpay_quit_less_amount if !mi(newpay_quit_less_amount)
	replace newpay_quit_change = 0 if newpay_quit == 2
	
	label define pctchange  -9 "> - 75%" -8 "-50-75%" -7 "-30-50%" -6 "-20-30%" -5 "-15-20%" -4 "-10-15%" -3 "-5-10%" -2 "-2-5%" ///
	-1 "-0-2%" 0 "0%" 1 "0-2%" 2 "2-5%" 3 "5-10%" 4 "10-15%" 5 "15-20%" 6 "20-30%" 7 "30-50%" 8 "50-75%" 9 ">75%"
	label values salary_switchout_change_pct pctchange
	label values salary_switchin_change_pct pctchange
	label define amountchange -12 "> -3000 euros" -11 "-2000-3000 euros" -10 "-1500-2000 euros" -9 "-1000-1500 euros" ///
	-8 "750-1000 euros" -7 "-500-750 euros" -6 "-400-500 euros" -5 "-300-400 euros" -4 "-200-300 euros" ///
	-3 "-100-200 euros" -2 "-50-100 euros" -1 "-0-50 euros" 0 "0 euros" 1 "0-50 euros" 2 "50-100 euros" 3 "100-200 euros" ///
	4 "200-300 euros" 5 "300-400 euros" 6 "400-500 euros" 7 "500-750 euros" 8 "750-1000 euros" 9 "1000-1500 euros" ///
	10 "1500-2000 euros" 11 "2000-3000 euros" 12 ">3000 euros"
	label values newpay_quit_change amountchange
	
	
	foreach val in "in" "out" {
	gen salary_switch`val'_change_mid= .
	foreach sign in "+" "-" {
	replace salary_switch`val'_change_mid = `sign'87.5 if salary_switch`val'_change_pct == `sign'9
	replace salary_switch`val'_change_mid = `sign'62.5 if salary_switch`val'_change_pct == `sign'8
	replace salary_switch`val'_change_mid = `sign'40 if salary_switch`val'_change_pct == `sign'7
	replace salary_switch`val'_change_mid = `sign'25 if salary_switch`val'_change_pct == `sign'6
	replace salary_switch`val'_change_mid = `sign'17.5 if salary_switch`val'_change_pct == `sign'5
	replace salary_switch`val'_change_mid = `sign'12.5 if salary_switch`val'_change_pct == `sign'4
	replace salary_switch`val'_change_mid = `sign'7.5 if salary_switch`val'_change_pct == `sign'3
	replace salary_switch`val'_change_mid = `sign'3.5 if salary_switch`val'_change_pct == `sign'2
	replace salary_switch`val'_change_mid = `sign'1 if salary_switch`val'_change_pct == `sign'1
	replace salary_switch`val'_change_mid = 0 if salary_switch`val'_change_pct == 0
	}
	}
	
	gen newpay_quit_mid = .
	foreach sign in "+" "-" {
	replace newpay_quit_mid = `sign'3500 if newpay_quit_change == `sign'12
	replace newpay_quit_mid = `sign'2500 if newpay_quit_change == `sign'11
	replace newpay_quit_mid = `sign'1750 if newpay_quit_change == `sign'10
	replace newpay_quit_mid = `sign'1250 if newpay_quit_change == `sign'9
	replace newpay_quit_mid = `sign'875 if newpay_quit_change == `sign'8
	replace newpay_quit_mid = `sign'625 if newpay_quit_change == `sign'7
	replace newpay_quit_mid = `sign'450 if newpay_quit_change == `sign'6
	replace newpay_quit_mid = `sign'350 if newpay_quit_change == `sign'5
	replace newpay_quit_mid = `sign'250 if newpay_quit_change == `sign'4
	replace newpay_quit_mid = `sign'150 if newpay_quit_change == `sign'3
	replace newpay_quit_mid = `sign'75 if newpay_quit_change == `sign'2
	replace newpay_quit_mid = `sign'25 if newpay_quit_change == `sign'1
	replace newpay_quit_mid = 0 if newpay_quit_change == 0
	}
	
	gen negotiate_pct_mid = 0 if negotiate_pct == 1
	replace negotiate_pct_mid = 1 if negotiate_pct == 2
	replace negotiate_pct_mid = 3.5 if negotiate_pct == 3
	replace negotiate_pct_mid = 7.5 if negotiate_pct == 4
	replace negotiate_pct_mid = 12.5 if negotiate_pct == 5
	replace negotiate_pct_mid = 20 if negotiate_pct == 6
	
	* tenure
	gen tenure = 20`i' - pseitj 
	*tab tenure
	*cap noisily groups tenure, select(f>20)
	drop if tenure < 0 | (tenure > 20`i' & !mi(tenure))
	gen tenure_range = 0 if tenure <= 1
	replace tenure_range = 1 if tenure <= 3 & tenure > 1
	replace tenure_range = 2 if tenure <= 6 & tenure > 3
	replace tenure_range = 3 if tenure <= 10 & tenure > 6
	replace tenure_range = 4 if tenure <= 15 & tenure > 10
	replace tenure_range = 5 if tenure <= 20 & tenure > 15
	replace tenure_range = 6 if tenure <= 30 & tenure > 20
	replace tenure_range = 7 if tenure > 30 & !mi(tenure)
	label define tenure_ranges 0 "0-1" 1 "1-3" 2 "3-5" 3 "6-10" 4 "10-15" 5 "15-20" 6 "20-30" 7 "30+" 
	label values tenure_range tenure_ranges
	
	* age
	gen age = 20`i' - yob
	gen age2 = age^2
	
	* full time
	gen fulltime = (employment ==1)
	label define fulltimes 0 "Part-time" 1 "Full-time"
	label values fulltime fulltimes
	
	* demographics
	replace salary = 12*salary
	gen l_salary = log(salary)
	qui su age, d
	gen old = (age >= r(p50) & !mi(age))
	qui su tenure, d
	gen experienced = (tenure >= r(p50) & !mi(tenure))
	gen of_nonego = 1- of_nego
	gen newpay_pct_change = newpay_quit_change/salary * 100
	cap gen married = marital == 1 if !mi(marital)
	gen ft_employment = employment == 1 if !mi(employment)
	gen pt_employment = employment == 2 if !mi(employment)
	gen employed = (ft_employment==1 | pt_employment==1)
	
	* reservation wages
	gen worker_rw = salary*(100-paycut_quit_pct)/100
	gen firm_rw = salary*(100+retention_pct)/100
	gen oo_rw = salary*(100+of_quit_pct)/100
	gen l_worker_rw = log(worker_rw)
	gen l_firm_rw = log(firm_rw)
	gen l_oo_rw = log(oo_rw)
		
	* surplus
	gen surplus = firm_rw - worker_rw
	gen beta = (salary - worker_rw) / surplus
	gen w_surplus = beta * surplus
	  
	gen f_surplus = (1-beta) * surplus
	gen surplus_share = surplus / firm_rw
	gen surplus_pct = surplus/salary*100
	gen w_surplus_pct = w_surplus/salary*100
	gen f_surplus_pct = f_surplus/salary*100
	gen l_surplus = log(firm_rw-worker_rw)
	gen l_w_surplus = log(w_surplus)
	gen l_f_surplus = log(f_surplus)
	
	*  switching cost
	gen switching_cost = (oo_rw-firm_rw)/firm_rw
	
	* define analysis sample
	gen asample = 1
	replace asample = 0 if employment != 1  // keep only full-time
	replace asample = 0 if mi(salary)
	replace asample = 0 if mi(worker_rw)
	replace asample = 0 if mi(firm_rw)
	replace asample = 0 if mi(of_nego)
	replace asample = 0 if salary < 0
	replace asample = 0 if firm_rw > 900000
	replace asample = 0 if worker_rw < 100
	 
	gen e_newpayquit_pct = (newpay_quit_mid*12)/salary * 100  
	  
	* amenity and monetary gap
	gen monetary_surplus_pct = - e_newpayquit_pct
	gen amenity_surplus_pct = w_surplus_pct - monetary_surplus_pct
	gen amenity_share = (amenity_surplus_pct / w_surplus_pct) * 100
	*tab amenity_share
	gen monetary_share = (monetary_surplus_pct/ w_surplus_pct) * 100
	*tab monetary_share
	gen amenity_surplus = amenity_share*w_surplus/100
	gen monetary_surplus = monetary_share*w_surplus/100
	gen l_amenity_surplus = ln(amenity_surplus)
	gen l_monetary_surplus = ln(monetary_surplus)
	  
	****************winsorized measures  
	  
	foreach var of varlist *monetary_surplus* *amenity_surplus* amenity_share monetary_share e_newpayquit_pct* w_surplus* {
	  quietly winsor `var', p(.02) gen(`var'_w2)
	 * winsor `var', p(.01) gen(`var'_w1)
	  quietly winsor `var', p(.05) gen(`var'_w5)
		quietly winsor `var', p(.1) gen(`var'_w10) 
	}
	
	label var w_surplus "Total Worker Surplus (in Euro)"
	label var w_surplus_pct "Worker Surplus (in % of salary)"
	label var monetary_surplus "Monetary Gap (in Euro)"
	label var monetary_surplus_pct "Monetary Gap (in % of salary)"  
	label var amenity_surplus_pct "Amenity Gap (in % of salary)"  
	label var amenity_share "Amenity Share of Surplus"  
	label var monetary_share "Monetary Share of Surplus"  
	label var amenity_surplus "Amenity Gap (in Euro)"  
	label var monetary_surplus "Monetary Gap (in Euro)"  
	label var l_amenity_surplus "Log Amenity Gap (in Euro)"  
	label var l_monetary_surplus "Log Monetary Gap (in Euro)"
	label var  e_newpayquit_pct "Expected Pay Change when Quitting (%)"
		
	  
	foreach j in 2 5 10 {
	label var w_surplus_w`j' "Total Worker Surplus - winsorized `j'"
	label var monetary_surplus_pct_w`j' "Monetary Gap (in %)- winsorized `j'"  
	label var amenity_surplus_pct_w`j' "Amenity Gap (in %)- winsorized `j'"  
	label var amenity_share_w`j' "Amenity Share of Surplus- winsorized `j'"  
	label var monetary_share_w`j' "Monetary Share of Surplus- winsorized `j'"  
	label var amenity_surplus_w`j' "Amenity Gap (in Euro)- winsorized `j'"  
	label var monetary_surplus_w`j' "Monetary Gap (in Euro)- winsorized `j'"  
	label var l_amenity_surplus_w`j' "Log Amenity Gap (in Euro)- winsorized `j'"  
	label var l_monetary_surplus_w`j' "Log Monetary Gap (in Euro)- winsorized `j'"  
	label var e_newpayquit_pct_w`j' "Expected Pay Change when Quitting (%) - winsorized `j'"
	}
	label var surplus_share "Surplus share of value-added"
	label var switching_cost "switching cost (%)"
	
	* treatment only in 2019

	if `i'==19 {
	gen over = (d_guess_median >=0) if !mi(d_guess_median)  
	label define overs 0 "Underestimator" 1 "Overestimator"
	label values over overs
	gen control_under = (over ==0 & treat==0) if !mi(over) & !mi(treat)
	gen control_over = (over ==1 & treat==0) if !mi(over) & !mi(treat)
	gen treat_under = (over ==0 & treat==1) if !mi(over) & !mi(treat)
	gen treat_over = (over ==1 & treat==1) if !mi(over) & !mi(treat)
	
	gen ownabovemedian =  (d_own_median >=0) if !mi(d_own_median) 
	gen control_lowwage = (ownabovemedian ==0 & treat==0) if !mi(ownabovemedian) & !mi(treat)
	gen control_highwage = (ownabovemedian ==1 & treat==0) if !mi(ownabovemedian) & !mi(treat)
	gen treat_lowwage = (ownabovemedian ==0 & treat==1) if !mi(ownabovemedian) & !mi(treat)
	gen treat_highwage = (ownabovemedian ==1 & treat==1) if !mi(ownabovemedian) & !mi(treat)
	
	gen treat_over_under = 1 if control_under == 1
	replace treat_over_under = 2 if control_over ==1
	replace treat_over_under = 3 if treat_under ==1
	replace treat_over_under = 4 if treat_over ==1
	
	label define treat_over_underlab 1 "Control - underestimate" 2 "Control - overestimate" 3 "Treatment - underestimate" 4 "Treatment - overestimate"
	label values treat_over_under treat_over_underlab
	
	gen treat_heterogeneity = 1 if treat == 0 & over == 0 & ownabovemedian == 0
	replace treat_heterogeneity = 2 if treat == 1 & over == 0 & ownabovemedian == 0
	replace treat_heterogeneity = 3 if treat == 0 & over == 1 & ownabovemedian == 0
	replace treat_heterogeneity = 4 if treat == 1 & over == 1 & ownabovemedian == 0
	replace treat_heterogeneity = 5 if treat == 0 & over == 0 & ownabovemedian == 1
	replace treat_heterogeneity = 6 if treat == 1 & over == 0 & ownabovemedian == 1
	replace treat_heterogeneity = 7 if treat == 0 & over == 1 & ownabovemedian == 1
	replace treat_heterogeneity = 8 if treat == 1 & over == 1 & ownabovemedian == 1
	
	label define treat_heterogeneitylab 1 "Control - underestimate - own below median" 2 "Treatment - underestimate - own below median" ///
	3 "Control - overestimate - own below median" 4 "Treatment - overestimate - own below median" ///
	5 "Control - underestimate - own above median" 6 "Treatment - underestimate - own above median" ///
	7 "Control - overestimate - own above median" 8 "Treatment - overestimate - own above median"
	label values treat_heterogeneity treat_heterogeneitylab
	}

	
	cap label var KLDB2010 "Occupation code"
	cap label var salary_median "Median salary"
	cap label var d_guess_median "Estimated median - Median salary"
	cap label var d_guess_own "Estimated median - Own salary"
	cap label var d_own_guess "Own - Estimated median salary"
	cap label var d_guess_own_fulltime "Estimated median - Own salary, Full time only"
	cap label var l_d_guess_own  "Log Estimated median - Log Own salary"
	cap label var l_d_own_guess  "Log Own salary - Log Estimated median"
	cap label var d_own_median "Own - Median salary"
	cap label var d_own_median_fulltime " Own - Median salary, Full time only"
	cap label var d_guess_median_pct "Bias (in %)"
	cap label var l_d_guess_median "Log Estimated median - Log Median salary"
	cap label var d_guess_own_pct "% Estimated median - Own salary"
	cap label var d_guess_own_fulltime_pct "% Estimated median - Own salary, Full time only"
	cap label var d_own_median_pct "% Own - Median salary"
	cap label var d_own_median_fulltime_pct "% Own - Median salary, Full time only"
	cap label var l_d_own_median "Log Own - Log Median salary"
	cap label var over "Overestimates the median salary"
	cap label var ownabovemedian "Own salary > Median salary"
	cap label var control_under "Control group - underestimators"
	cap label var control_over "Control group - overestimators"
	cap label var treat_under "Treatment group - underestimators"
	cap label var treat_over "Treatment group -  overestimators"
	cap label var treat_over_under "Treatment group, separate over and under estimators"
	cap label var control_lowwage "Control group - own wage below median"
	cap label var control_highwage "Control group - own wage above median"
	cap label var treat_lowwage "Treatment group - own wage below median"
	cap label var treat_highwage "Treatment group -  own wage above median"
	cap label var negotiate_pct_mid "Mid - Raise expectations for the next calendar year"
	cap label var salary_switchin_change_pct "% salary change when switching in"
	cap label var salary_switchout_change_pct "% salary change when switching out"
	cap label var salary_switchin_change_mid "Mid % salary change when switching in"
	cap label var salary_switchout_change_mid "Mid % salary change when switching out"
	cap label var newpay_quit_change "Salary change if quit"
	cap label var newpay_quit_mid  "Mid - Salary change if quit"
	cap label var d_surplus "derence between  pay raise and paycut to quit"
	cap label var tenure_range "Number of years at current employer"
	cap label var age "Age"
	cap label var age2 "Age squared"
	cap label var fulltime "Full time dummy"
	
	
	if `i'== 19 {
		foreach var of varlist over control_under control_over treat_under treat_over ownabovemedian {
			label values `var' yesno
		}
	}
		
	
	save "$data/GSOEPIS20`i'_survey_long.dta", replace
	
	foreach var of varlist _all {
		ren `var' `var'_y`i' 
		}
		
	foreach var in pid cid hid {
		cap ren `var'_y`i' `var'
	}
	
	
	save "$data/GSOEPIS20`i'_survey.dta", replace
}

*******************************************************
**  III/ merge in personal information from 2018 wave
*******************************************************

* pgen dataset used for number of years of education and highest degree achieved
use "$orig/pgen.dta", clear
keep pid pgbilzt pgcasmin syear
foreach var of varlist _all {
replace `var' = . if `var' == -1 | `var' == -2 | `var' == -5
}
* This is a panel dataset, we select the last year of survey to make sure we have the most up to date data
bys pid (syear): keep if _n ==_N
duplicates drop
ren pgbilzt yearsedu 
ren pgcasmin maxedu

*******************************************
**  IV/ Order and save dataset  **
*******************************************

save "$data/GSOEPIS2019_survey.dta", replace

merge 1:1 pid using "$data/GSOEPIS2020_survey.dta"
gen wave = "2019" if _merge == 1
replace wave = "both" if _merge == 3
drop if _merge == 2
drop _merge

save "$data/GSOEPIS_survey.dta", replace

log close
clear
